-- This script processes number of faults per geographical location.

-- author: cristina

REGISTER locationconcat.jar;

raw_event = LOAD '$inputDir1/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id:int,platform_id:int,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray);
raw_node = LOAD '$inputDir2/node.tab' USING PigStorage('\t') AS (node_id:int,platform_id:int,node_name,node_ip,node_location,timezone,proc_model,os_name,cores_per_proc,num_procs,mem_size,disk_size, up_bw,down_bw,metric_id,notes);

-- build durations
event = FOREACH raw_event GENERATE flatten(Concat(node_id,platform_id)) as id, 1 as value;
node = FOREACH raw_node GENERATE flatten(Concat(node_id,platform_id)) as id, node_location as location;
--event = FOREACH raw_event GENERATE node_id as id, 1 as value;
--node = FOREACH raw_node GENERATE node_id as id, node_location as location;
joined = JOIN event by (chararray)id, node by (chararray)id;
filtered = FOREACH joined GENERATE location, value;
grouped = GROUP filtered by (chararray)location;
rez =  FOREACH grouped GENERATE group, SUM(filtered.value);

STORE rez INTO '$outputDir' USING PigStorage();

